; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static < %s \
; RUN:   | FileCheck --check-prefixes=O32,O32-BE %s
; RUN: llc -mtriple=mipsel-linux-gnu -relocation-model=static < %s \
; RUN:   | FileCheck --check-prefixes=O32,O32-LE %s

; RUN-TODO: llc -mtriple=mips64-linux-gnu -relocation-model=static -target-abi o32 < %s \
; RUN-TODO:   | FileCheck --check-prefixes=O32 %s
; RUN-TODO: llc -mtriple=mips64el-linux-gnu -relocation-model=static -target-abi o32 < %s \
; RUN-TODO:   | FileCheck --check-prefixes=O32 %s

; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -target-abi n32 < %s \
; RUN:   | FileCheck --check-prefixes=N32,N32-BE %s
; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -target-abi n32 < %s \
; RUN:   | FileCheck --check-prefixes=N32,N32-LE %s

; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -target-abi n64 < %s \
; RUN:   | FileCheck --check-prefixes=N64,N64-BE %s
; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -target-abi n64 < %s \
; RUN:   | FileCheck --check-prefixes=N64,N64-LE %s

; Test struct returns for all ABI's and byte orders.

@struct_byte = global {i8} zeroinitializer
@struct_2byte = global {i8,i8} zeroinitializer
@struct_3xi16 = global {[3 x i16]} zeroinitializer
@struct_6xi32 = global {[6 x i32]} zeroinitializer
@struct_128xi16 = global {[128 x i16]} zeroinitializer

declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1)

define inreg {i8} @ret_struct_i8() nounwind {
; O32-LABEL: ret_struct_i8:
; O32:       # %bb.0: # %entry
; O32-NEXT:    lui $1, %hi(struct_byte)
; O32-NEXT:    jr $ra
; O32-NEXT:    lbu $2, %lo(struct_byte)($1)
;
; N32-BE-LABEL: ret_struct_i8:
; N32-BE:       # %bb.0: # %entry
; N32-BE-NEXT:    lui $1, %hi(struct_byte)
; N32-BE-NEXT:    lb $1, %lo(struct_byte)($1)
; N32-BE-NEXT:    jr $ra
; N32-BE-NEXT:    dsll $2, $1, 56
;
; N32-LE-LABEL: ret_struct_i8:
; N32-LE:       # %bb.0: # %entry
; N32-LE-NEXT:    lui $1, %hi(struct_byte)
; N32-LE-NEXT:    jr $ra
; N32-LE-NEXT:    lb $2, %lo(struct_byte)($1)
;
; N64-BE-LABEL: ret_struct_i8:
; N64-BE:       # %bb.0: # %entry
; N64-BE-NEXT:    lui $1, %highest(struct_byte)
; N64-BE-NEXT:    daddiu $1, $1, %higher(struct_byte)
; N64-BE-NEXT:    dsll $1, $1, 16
; N64-BE-NEXT:    daddiu $1, $1, %hi(struct_byte)
; N64-BE-NEXT:    dsll $1, $1, 16
; N64-BE-NEXT:    lb $1, %lo(struct_byte)($1)
; N64-BE-NEXT:    jr $ra
; N64-BE-NEXT:    dsll $2, $1, 56
;
; N64-LE-LABEL: ret_struct_i8:
; N64-LE:       # %bb.0: # %entry
; N64-LE-NEXT:    lui $1, %highest(struct_byte)
; N64-LE-NEXT:    daddiu $1, $1, %higher(struct_byte)
; N64-LE-NEXT:    dsll $1, $1, 16
; N64-LE-NEXT:    daddiu $1, $1, %hi(struct_byte)
; N64-LE-NEXT:    dsll $1, $1, 16
; N64-LE-NEXT:    jr $ra
; N64-LE-NEXT:    lb $2, %lo(struct_byte)($1)
entry:
        %0 = load volatile {i8}, ptr @struct_byte
        ret {i8} %0
}

; This test is based on the way clang currently lowers {i8,i8} to {i16}.
; FIXME: It should probably work for without any lowering too but this doesn't
;        work as expected. Each member gets mapped to a register rather than
;        packed into a single register.
define inreg {i16} @ret_struct_i16() nounwind {
; O32-LABEL: ret_struct_i16:
; O32:       # %bb.0: # %entry
; O32-NEXT:    addiu $sp, $sp, -8
; O32-NEXT:    lui $1, %hi(struct_2byte)
; O32-NEXT:    lhu $1, %lo(struct_2byte)($1)
; O32-NEXT:    sh $1, 0($sp)
; O32-NEXT:    lhu $2, 0($sp)
; O32-NEXT:    jr $ra
; O32-NEXT:    addiu $sp, $sp, 8
;
; N32-BE-LABEL: ret_struct_i16:
; N32-BE:       # %bb.0: # %entry
; N32-BE-NEXT:    addiu $sp, $sp, -16
; N32-BE-NEXT:    lui $1, %hi(struct_2byte)
; N32-BE-NEXT:    lhu $1, %lo(struct_2byte)($1)
; N32-BE-NEXT:    sh $1, 8($sp)
; N32-BE-NEXT:    lh $1, 8($sp)
; N32-BE-NEXT:    dsll $2, $1, 48
; N32-BE-NEXT:    jr $ra
; N32-BE-NEXT:    addiu $sp, $sp, 16
;
; N32-LE-LABEL: ret_struct_i16:
; N32-LE:       # %bb.0: # %entry
; N32-LE-NEXT:    addiu $sp, $sp, -16
; N32-LE-NEXT:    lui $1, %hi(struct_2byte)
; N32-LE-NEXT:    lhu $1, %lo(struct_2byte)($1)
; N32-LE-NEXT:    sh $1, 8($sp)
; N32-LE-NEXT:    lh $2, 8($sp)
; N32-LE-NEXT:    jr $ra
; N32-LE-NEXT:    addiu $sp, $sp, 16
;
; N64-BE-LABEL: ret_struct_i16:
; N64-BE:       # %bb.0: # %entry
; N64-BE-NEXT:    daddiu $sp, $sp, -16
; N64-BE-NEXT:    lui $1, %highest(struct_2byte)
; N64-BE-NEXT:    daddiu $1, $1, %higher(struct_2byte)
; N64-BE-NEXT:    dsll $1, $1, 16
; N64-BE-NEXT:    daddiu $1, $1, %hi(struct_2byte)
; N64-BE-NEXT:    dsll $1, $1, 16
; N64-BE-NEXT:    lhu $1, %lo(struct_2byte)($1)
; N64-BE-NEXT:    sh $1, 8($sp)
; N64-BE-NEXT:    lh $1, 8($sp)
; N64-BE-NEXT:    dsll $2, $1, 48
; N64-BE-NEXT:    jr $ra
; N64-BE-NEXT:    daddiu $sp, $sp, 16
;
; N64-LE-LABEL: ret_struct_i16:
; N64-LE:       # %bb.0: # %entry
; N64-LE-NEXT:    daddiu $sp, $sp, -16
; N64-LE-NEXT:    lui $1, %highest(struct_2byte)
; N64-LE-NEXT:    daddiu $1, $1, %higher(struct_2byte)
; N64-LE-NEXT:    dsll $1, $1, 16
; N64-LE-NEXT:    daddiu $1, $1, %hi(struct_2byte)
; N64-LE-NEXT:    dsll $1, $1, 16
; N64-LE-NEXT:    lhu $1, %lo(struct_2byte)($1)
; N64-LE-NEXT:    sh $1, 8($sp)
; N64-LE-NEXT:    lh $2, 8($sp)
; N64-LE-NEXT:    jr $ra
; N64-LE-NEXT:    daddiu $sp, $sp, 16
entry:
        %retval = alloca {i8,i8}, align 8
        call void @llvm.memcpy.p0.p0.i64(ptr %retval, ptr @struct_2byte, i64 2, i1 false)
        %0 = load volatile {i16}, ptr %retval
        ret {i16} %0
}

; Ensure that structures bigger than 32-bits but smaller than 64-bits are
; also returned in the upper bits on big endian targets. Previously, these were
; missed by the CCPromoteToType and the shift didn't happen.
define inreg {i48} @ret_struct_3xi16() nounwind {
; O32-BE-LABEL: ret_struct_3xi16:
; O32-BE:       # %bb.0: # %entry
; O32-BE-NEXT:    lui $1, %hi(struct_3xi16)
; O32-BE-NEXT:    lw $2, %lo(struct_3xi16)($1)
; O32-BE-NEXT:    sll $3, $2, 16
; O32-BE-NEXT:    addiu $1, $1, %lo(struct_3xi16)
; O32-BE-NEXT:    lhu $1, 4($1)
; O32-BE-NEXT:    or $3, $1, $3
; O32-BE-NEXT:    jr $ra
; O32-BE-NEXT:    srl $2, $2, 16
;
; O32-LE-LABEL: ret_struct_3xi16:
; O32-LE:       # %bb.0: # %entry
; O32-LE-NEXT:    lui $1, %hi(struct_3xi16)
; O32-LE-NEXT:    lw $2, %lo(struct_3xi16)($1)
; O32-LE-NEXT:    addiu $1, $1, %lo(struct_3xi16)
; O32-LE-NEXT:    jr $ra
; O32-LE-NEXT:    lhu $3, 4($1)
;
; N32-BE-LABEL: ret_struct_3xi16:
; N32-BE:       # %bb.0: # %entry
; N32-BE-NEXT:    lui $1, %hi(struct_3xi16)
; N32-BE-NEXT:    lw $2, %lo(struct_3xi16)($1)
; N32-BE-NEXT:    dsll $2, $2, 32
; N32-BE-NEXT:    addiu $1, $1, %lo(struct_3xi16)
; N32-BE-NEXT:    lhu $1, 4($1)
; N32-BE-NEXT:    dsll $1, $1, 16
; N32-BE-NEXT:    jr $ra
; N32-BE-NEXT:    or $2, $2, $1
;
; N32-LE-LABEL: ret_struct_3xi16:
; N32-LE:       # %bb.0: # %entry
; N32-LE-NEXT:    lui $1, %hi(struct_3xi16)
; N32-LE-NEXT:    lwu $2, %lo(struct_3xi16)($1)
; N32-LE-NEXT:    addiu $1, $1, %lo(struct_3xi16)
; N32-LE-NEXT:    lh $1, 4($1)
; N32-LE-NEXT:    dsll $1, $1, 32
; N32-LE-NEXT:    jr $ra
; N32-LE-NEXT:    or $2, $2, $1
;
; N64-BE-LABEL: ret_struct_3xi16:
; N64-BE:       # %bb.0: # %entry
; N64-BE-NEXT:    lui $1, %highest(struct_3xi16)
; N64-BE-NEXT:    daddiu $1, $1, %higher(struct_3xi16)
; N64-BE-NEXT:    dsll $1, $1, 16
; N64-BE-NEXT:    daddiu $1, $1, %hi(struct_3xi16)
; N64-BE-NEXT:    dsll $1, $1, 16
; N64-BE-NEXT:    lw $2, %lo(struct_3xi16)($1)
; N64-BE-NEXT:    dsll $2, $2, 32
; N64-BE-NEXT:    daddiu $1, $1, %lo(struct_3xi16)
; N64-BE-NEXT:    lhu $1, 4($1)
; N64-BE-NEXT:    dsll $1, $1, 16
; N64-BE-NEXT:    jr $ra
; N64-BE-NEXT:    or $2, $2, $1
;
; N64-LE-LABEL: ret_struct_3xi16:
; N64-LE:       # %bb.0: # %entry
; N64-LE-NEXT:    lui $1, %highest(struct_3xi16)
; N64-LE-NEXT:    daddiu $1, $1, %higher(struct_3xi16)
; N64-LE-NEXT:    dsll $1, $1, 16
; N64-LE-NEXT:    daddiu $1, $1, %hi(struct_3xi16)
; N64-LE-NEXT:    dsll $1, $1, 16
; N64-LE-NEXT:    lwu $2, %lo(struct_3xi16)($1)
; N64-LE-NEXT:    daddiu $1, $1, %lo(struct_3xi16)
; N64-LE-NEXT:    lh $1, 4($1)
; N64-LE-NEXT:    dsll $1, $1, 32
; N64-LE-NEXT:    jr $ra
; N64-LE-NEXT:    or $2, $2, $1
entry:
        %0 = load volatile i48, ptr @struct_3xi16, align 2
        %1 = insertvalue {i48} undef, i48 %0, 0
        ret {i48} %1
}

; Ensure that large structures (>128-bit) are returned indirectly.
; We pick an extremely large structure so we don't have to match inlined memcpy's.
define void @ret_struct_128xi16(ptr sret({[128 x i16]}) %returnval) {
; O32-LABEL: ret_struct_128xi16:
; O32:       # %bb.0: # %entry
; O32-NEXT:    addiu $sp, $sp, -24
; O32-NEXT:    .cfi_def_cfa_offset 24
; O32-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
; O32-NEXT:    sw $16, 16($sp) # 4-byte Folded Spill
; O32-NEXT:    .cfi_offset 31, -4
; O32-NEXT:    .cfi_offset 16, -8
; O32-NEXT:    move $16, $4
; O32-NEXT:    lui $1, %hi(struct_128xi16)
; O32-NEXT:    addiu $5, $1, %lo(struct_128xi16)
; O32-NEXT:    jal memcpy
; O32-NEXT:    addiu $6, $zero, 256
; O32-NEXT:    move $2, $16
; O32-NEXT:    lw $16, 16($sp) # 4-byte Folded Reload
; O32-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
; O32-NEXT:    jr $ra
; O32-NEXT:    addiu $sp, $sp, 24
;
; N32-LABEL: ret_struct_128xi16:
; N32:       # %bb.0: # %entry
; N32-NEXT:    addiu $sp, $sp, -16
; N32-NEXT:    .cfi_def_cfa_offset 16
; N32-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
; N32-NEXT:    sd $16, 0($sp) # 8-byte Folded Spill
; N32-NEXT:    .cfi_offset 31, -8
; N32-NEXT:    .cfi_offset 16, -16
; N32-NEXT:    lui $1, %hi(struct_128xi16)
; N32-NEXT:    addiu $5, $1, %lo(struct_128xi16)
; N32-NEXT:    sll $16, $4, 0
; N32-NEXT:    jal memcpy
; N32-NEXT:    daddiu $6, $zero, 256
; N32-NEXT:    move $2, $16
; N32-NEXT:    ld $16, 0($sp) # 8-byte Folded Reload
; N32-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
; N32-NEXT:    jr $ra
; N32-NEXT:    addiu $sp, $sp, 16
;
; N64-LABEL: ret_struct_128xi16:
; N64:       # %bb.0: # %entry
; N64-NEXT:    daddiu $sp, $sp, -16
; N64-NEXT:    .cfi_def_cfa_offset 16
; N64-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
; N64-NEXT:    sd $16, 0($sp) # 8-byte Folded Spill
; N64-NEXT:    .cfi_offset 31, -8
; N64-NEXT:    .cfi_offset 16, -16
; N64-NEXT:    move $16, $4
; N64-NEXT:    lui $1, %highest(struct_128xi16)
; N64-NEXT:    daddiu $1, $1, %higher(struct_128xi16)
; N64-NEXT:    dsll $1, $1, 16
; N64-NEXT:    daddiu $1, $1, %hi(struct_128xi16)
; N64-NEXT:    dsll $1, $1, 16
; N64-NEXT:    daddiu $5, $1, %lo(struct_128xi16)
; N64-NEXT:    jal memcpy
; N64-NEXT:    daddiu $6, $zero, 256
; N64-NEXT:    move $2, $16
; N64-NEXT:    ld $16, 0($sp) # 8-byte Folded Reload
; N64-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
; N64-NEXT:    jr $ra
; N64-NEXT:    daddiu $sp, $sp, 16
entry:
        call void @llvm.memcpy.p0.p0.i64(ptr align 2 %returnval, ptr align 2 @struct_128xi16, i64 256, i1 false)
        ret void
}

; Ensure that large structures (>128-bit) are returned indirectly.
; This will generate inlined memcpy's anyway so pick the smallest large
; structure
; This time we let the backend lower the sret argument.
define {[6 x i32]} @ret_struct_6xi32() {
; O32-LABEL: ret_struct_6xi32:
; O32:       # %bb.0: # %entry
; O32-NEXT:    lui $1, %hi(struct_6xi32)
; O32-NEXT:    lw $2, %lo(struct_6xi32)($1)
; O32-NEXT:    addiu $1, $1, %lo(struct_6xi32)
; O32-NEXT:    lw $3, 4($1)
; O32-NEXT:    lw $5, 8($1)
; O32-NEXT:    lw $6, 12($1)
; O32-NEXT:    lw $7, 16($1)
; O32-NEXT:    lw $1, 20($1)
; O32-NEXT:    sw $1, 20($4)
; O32-NEXT:    sw $7, 16($4)
; O32-NEXT:    sw $6, 12($4)
; O32-NEXT:    sw $5, 8($4)
; O32-NEXT:    sw $3, 4($4)
; O32-NEXT:    jr $ra
; O32-NEXT:    sw $2, 0($4)
;
; N32-LABEL: ret_struct_6xi32:
; N32:       # %bb.0: # %entry
; N32-NEXT:    sll $1, $4, 0
; N32-NEXT:    lui $2, %hi(struct_6xi32)
; N32-NEXT:    lw $3, %lo(struct_6xi32)($2)
; N32-NEXT:    addiu $2, $2, %lo(struct_6xi32)
; N32-NEXT:    lw $4, 4($2)
; N32-NEXT:    lw $5, 8($2)
; N32-NEXT:    lw $6, 12($2)
; N32-NEXT:    lw $7, 16($2)
; N32-NEXT:    lw $2, 20($2)
; N32-NEXT:    sw $2, 20($1)
; N32-NEXT:    sw $7, 16($1)
; N32-NEXT:    sw $6, 12($1)
; N32-NEXT:    sw $5, 8($1)
; N32-NEXT:    sw $4, 4($1)
; N32-NEXT:    jr $ra
; N32-NEXT:    sw $3, 0($1)
;
; N64-LABEL: ret_struct_6xi32:
; N64:       # %bb.0: # %entry
; N64-NEXT:    lui $1, %highest(struct_6xi32)
; N64-NEXT:    daddiu $1, $1, %higher(struct_6xi32)
; N64-NEXT:    dsll $1, $1, 16
; N64-NEXT:    daddiu $1, $1, %hi(struct_6xi32)
; N64-NEXT:    dsll $1, $1, 16
; N64-NEXT:    lw $2, %lo(struct_6xi32)($1)
; N64-NEXT:    daddiu $1, $1, %lo(struct_6xi32)
; N64-NEXT:    lw $3, 4($1)
; N64-NEXT:    lw $5, 8($1)
; N64-NEXT:    lw $6, 12($1)
; N64-NEXT:    lw $7, 16($1)
; N64-NEXT:    lw $1, 20($1)
; N64-NEXT:    sw $1, 20($4)
; N64-NEXT:    sw $7, 16($4)
; N64-NEXT:    sw $6, 12($4)
; N64-NEXT:    sw $5, 8($4)
; N64-NEXT:    sw $3, 4($4)
; N64-NEXT:    jr $ra
; N64-NEXT:    sw $2, 0($4)
entry:
        %0 = load volatile {[6 x i32]}, ptr @struct_6xi32, align 2
        ret {[6 x i32]} %0
}
